* Firm organization with multiple establishments
* Appendix A: Data preparation for Tables A.2-A.6, 2012 cross-section

clear all 
set matsize 2000
set more off

capture log close
log using log/27_cmrh-cs_replication_firm_data.log, replace

********************************************************************************

use data/Panel.dta if jahr == 2012, clear
desc
//	Drop employee characteristics that are not needed in analyses
drop grund stib frau foreign ein_erw nbr_occ tage_wz tage_occ ///
	l_beruf f_beruf l_tentgelt f_tentgelt d_age 
//	Drop firm characteristics that are not needed in analyses
drop mean_wage verysmall
drop grd_dat lzt_dat ao_bula
order limit cens lnw, after(tentgelt)
	
********************************************************************************
***	Sample restriction *********************************************************
********************************************************************************

*	Minimum firm size
bys untid jahr: egen empl_unt = count(persnr)
by  untid: egen min_empl = min(empl_unt)
qui keep if min_empl >= 10
drop min_empl

*	Cleaning: social security limit
qui replace tentgelt = limit if cens == 1
drop limit cens
qui replace lnw = ln(tentgelt) 		
count if lnw == .

*	Multi-establishment firms
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)
gen aux_mbu = (count_est > 1)
bys untid jahr: egen d_mbu = max(aux_mbu)
cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)


********************************************************************************
***	Establishment characteristics **********************************************
********************************************************************************

bys betnr jahr: egen empl_est = count(persnr)

merge m:1 betnr jahr using data/BHPinclUntID.dta
qui keep if _merge == 3
drop _merge

drop count_est_siab chge_est entry_unt jahr_eins_final jahr_eins_est az_vz eintritt ///
	betnr_vor untid_vor verysmall matchtype d_multest_4 ever_multest_4 always_multest_4 vorg_died ///
	mode austritt betnr_nach nachf_willbenew untid_nach vorgaenger nachfolger ///
	erst_jahr_eintritt jahr_lzt_est first_year_ME entry_dyn exit_dyn exit_unt add_est mode_unt sector sector2 hq_sector

order hq_kreis, after(ao_bula)

********************************************************************************
***	Layer classification based on KldB2010, managerial organization ************
********************************************************************************

merge m:1 beruf2010 using data/KldB2010_LayerCMRHFriedrich.dta
qui drop if _merge == 2 
drop _merge

qui replace layer_neuAG = 3 if beruf2010 == 73294
qui replace layer_neuAG = 2 if beruf2010 == 71224
qui replace layer_neuAG = 0 if beruf2010 == 1402
qui replace layer_neuAG = 0 if beruf2010 == 1302
qui replace layer_neuAG = 0 if beruf2010 == 1203
qui replace layer_neuAG = 1 if beruf2010 == 1104

qui drop layer
rename layer_neuAG layer

bys untid jahr: egen lowest = min(layer)
bys untid jahr: egen second_lowest = min(layer) if layer > lowest
bys untid jahr: egen third_lowest = min(layer) if layer > second_lowest
bys untid jahr: egen highest = min(layer) if layer > third_lowest

gen layer_rank = .
replace layer_rank = 0 if layer == lowest
replace layer_rank = 1 if layer == second_lowest & layer_rank == .
replace layer_rank = 2 if layer == third_lowest & layer_rank == .
replace layer_rank = 3 if layer == highest & layer_rank == .
drop lowest *_lowest highest

tab2 layer_rank layer, missing

// Number of layers
egen flg_untjhrlay = tag(untid jahr layer)
bys untid jahr: egen count_lay_unt = total(flg_untjhrlay)
drop flg_untjhrlay

// Combination of layers
sort untid jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by untid jahr: egen count_layer`l' = sum(e_`l')
		by untid jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy firm has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

capture drop comb_layer_unt
gen comb_layer_unt = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer_unt "Pattern of layers, from high to low, firm level"
label define structure 1 "0" 10 "1" 100 "2" 1000 "3" 11 "0 + 1" 101 "0 + 2" 1001 "0 + 3" ///
	110 "1 + 2" 1010 "1 + 3" 1100 "2 + 3" 111 "0 + 1 + 2" 1101 "0 + 2 + 3" 1011 "0 + 1 + 3" ///
	1110 "1 + 2 + 3" 1111 "All layers"
label values comb_layer_unt structure
drop l_*

//	Number of managerial layers
egen flg_untjhrlay = tag(untid jahr layer_rank)
qui replace flg_untjhrlay = 0 if layer_rank == 0
bys untid jahr: egen count_mgmt_unt = total(flg_untjhrlay)
drop flg_untjhrlay

********************************************************************************
***	Save individual-level data *************************************************
********************************************************************************

preserve

cap drop lnw
gen lnw = log(tentgelt)

keep betnr jahr persnr tentgelt lnw d_educ tage_bet layer layer_rank w08_5 beruf2010
save "data/CMRH_2012_p_layer-ind.dta", replace

restore

********************************************************************************
***	establishment characteristics **********************************************
********************************************************************************

// Other sums
bys untid jahr: egen tot_wages = total(tentgelt)
bys untid jahr: egen tot_educ = total(d_educ)
bys untid jahr: egen tot_tenure = total(tage_bet)

// Reduce to establishment-level variables
preserve
keep untid jahr empl_unt tot_* count_lay_unt comb_layer_unt count_mgmt_unt count_est

duplicates drop
duplicates report untid jahr
tab jahr, missing
label variable empl_unt "# of employees in firm"
label variable tot_wages "Wage sum in firm"
label variable tot_educ "Education sum in firm"
label variable count_lay_unt "# of layers in firm"
label variable count_mgmt_unt "# of mgmt layers in firm"
describe

save data/firmlevel_CMRH_cs.dta, replace
restore

********************************************************************************
***	Hierarchy characteristics **************************************************
********************************************************************************

bys untid jahr layer_rank: egen empl_lyr = count(persnr)
bys untid jahr layer_rank: egen wage_lyr = total(tentgelt)
bys untid jahr layer_rank: egen educ_lyr = total(d_educ)
bys untid jahr layer_rank: egen tenure_lyr = total(tage_bet)

egen flg_untjhrlay = tag(untid jahr layer_rank)
keep if flg_untjhrlay == 1
drop flg_untjhrlay

keep wage_lyr empl_lyr educ_lyr tenure_lyr jahr untid layer_rank
reshape wide wage_lyr empl_lyr educ_lyr tenure_lyr, i(jahr untid) j(layer_rank)

********************************************************************************
***	Combine data ***************************************************************
********************************************************************************

merge 1:1 untid jahr using data/firmlevel_CMRH_cs.dta
drop if _merge == 2
drop _merge

capture drop flg_unt
capture drop flg_untjhr
capture drop flg_untjhr

egen flg_unt = tag(untid)
egen flg_untjhr = tag(untid jahr)

xtset, clear
sort untid jahr
xtset untid jahr

cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)


************************************
** Other supporting variables  *****
************************************
gen avg_wage=tot_wages/empl_unt

*marking establishments with adjacent (consecutively ordered) layers
gen correctlyr= (comb_layer_unt == 1 | comb_layer_unt ==11  | comb_layer_unt ==111 | comb_layer_unt ==1111)
*keep if correctlyr==1 	// remove the asterisk and let this instruction be executed to 
						// produce tables with consecutively ordered layers.
						
*average wage and skill by layer
foreach v in wage educ tenure {
	forvalues l=0/3 {
		gen avg_`v'_lyr`l'=`v'_lyr`l'/empl_lyr`l'
		
	}
}
*

*log employees
gen ln_empl_unt=log(empl_unt)

*log average wage
gen lavgw=log(avg_wage)

sort untid jahr
* generating normalized empl
gen norm_empl_lyr0=.
gen norm_empl_lyr1=.
gen norm_empl_lyr2=.
gen norm_empl_lyr3=.
forvalues l=0/3 {
		forvalues c=0/`l' {
			gen temp = empl_lyr`c'/empl_lyr`l' if count_mgmt_unt==`l'
			replace norm_empl_lyr`c' = temp if count_mgmt_unt==`l'
			drop temp
		}		
}
*
egen norm_empl=rowtotal(norm_empl*)

compress 
save "data/CMRH_2012_cs_layer-unt.dta", replace

********************************************************************************
********************************************************************************

log close
